# This file includes a number of constants and programs
# we use throughout our other R files.

baseline_region <- "DE300"

# A palette of 25 mostly distinct colors
c25 <- c(
  "dodgerblue2", "#E31A1C", # red
  "green4",
  "#6A3D9A", # purple
  "#FF7F00", # orange
  "black", "gold1",
  "skyblue2", "#FB9A99", # lt pink
  "palegreen2",
  "#CAB2D6", # lt purple
  "#FDBF6F", # lt orange
  "gray70", "khaki2",
  "maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
  "darkturquoise", "green1", "yellow4", "yellow3",
  "darkorange4", "brown"
)

# This function generates binned scatter plots, similar to the Stata function 'binscatter'
# (see: https://michaelstepner.com/binscatter/). It also has the advantage of handling high dimensional
# fixed effects using felm.
# Inputs:
#   dat = The data set to use
#   xaxis_measure = The measure you want on the x-axis of the binned scatter plot.
#         Optionally, you can pass a vector of two measures as xaxis_measure that have been generated
#         by randomly splitting the sample in two. The resulting scatter plot, then, will IV on one split using the other
#         thereby accounting for measurement error (practically this will not change the correlation, but will increase the slope).
#   yaxis_measure = The measure you want on the y-axis of the binned scatter plot
#   linear_controls = The things you want to control for linearly (see link above for a description of how this is handled)
#   fes = The FEs you want to control for
#   bins = The number of bins you want (defaults to 50)

# Outputs: ggplot object of the binscatter
binscatter <- function(dat, xaxis_measure, yaxis_measure, linear_controls=NULL, fes=NULL, bins=50){

  xaxis_label <- xaxis_measure
  yaxis_label <- yaxis_measure

  # If there are controls collapse them in a string
  if(!is.null(linear_controls)){controls_string <- paste(linear_controls, collapse=" + ")}
  else{controls_string <- "1"}

  # If there are fes collapse them in a string
  if(!is.null(fes)){fes_string <- paste(fes, collapse=" + ")}
  else{fes_string <- "0"}

  if(length(xaxis_measure) == 2){

    # If there are only linear controls
    if(!is.null(linear_controls) & is.null(fes)){
      shrink_model <- lm(as.formula(str_interp("${xaxis_measure[2]} ~ ${xaxis_measure[1]} + ${controls_string}")), data=dat)
    }
    # If there are only fes
    if(is.null(linear_controls) & !is.null(fes)){
      shrink_model <- felm(as.formula(str_interp("${xaxis_measure[2]} ~ ${xaxis_measure[1]} | ${fes_string} | 0 | 0")), data=dat)
    }
    # If there are both
    if(!is.null(linear_controls) & !is.null(fes)){
      shrink_model <- felm(as.formula(str_interp("${xaxis_measure[2]} ~  ${xaxis_measure[1]} + ${controls_string} | ${fes_string} | 0 | 0")), data=dat)
    }
    # If there are neither
    if(is.null(linear_controls) & is.null(fes)){
      shrink_model <- lm(as.formula(str_interp("${xaxis_measure[2]} ~ ${xaxis_measure[1]}")), data=dat)
    }

    dat$shrunk_x <- fitted(shrink_model)
    xaxis_measure <- "shrunk_x"
  }


  # If you have linear controls or FEs then residualize
  if(!is.null(controls_string) | !is.null(fes)){

    est_x <- felm(as.formula(str_interp("${xaxis_measure} ~ ${controls_string} | ${fes_string} | 0 | 0")), data=dat)
    dat$resid_x <- est_x$residuals + mean(dat[[xaxis_measure]])

    est_y <- felm(as.formula(str_interp("${yaxis_measure} ~ ${controls_string} | ${fes_string} | 0 | 0")), data=dat)
    dat$resid_y <- est_y$residuals + mean(dat[[yaxis_measure]])

    xaxis_measure <- "resid_x"
    yaxis_measure <- "resid_y"

  }

  indiv_level_cor <- cor(dat[[xaxis_measure]], dat[[yaxis_measure]])

  model <- lm(dat[[yaxis_measure]] ~ dat[[xaxis_measure]]) %>% broom::tidy()

  indiv_level_slope <- model$estimate[2]
  indiv_level_slope_SE <- model$std.error[2]
  indiv_level_yint <- model$estimate[1]

  p <- dat %>%
    mutate(x_axis = ntile(!!sym(xaxis_measure), bins)) %>%
    group_by(x_axis) %>%
    summarise(
      y_axis = mean(!!sym(yaxis_measure)),
      x_axis = mean(!!sym(xaxis_measure))) %>%
    ungroup %>%
    ggplot(., aes(x=x_axis, y=y_axis)) +
    geom_point(size=3) +
    geom_smooth(method='lm', formula = y ~ x, se=FALSE, size=0.8) +
    theme_classic() +
    labs(
      x = xaxis_label,
      y = yaxis_label,
      title = str_interp("Individual Level Corr = ${sprintf(indiv_level_cor, fmt='%#.3f')}
Slope = ${sprintf(indiv_level_slope, fmt='%#.3f')} (${sprintf(indiv_level_slope_SE, fmt='%#.3f')})
Y-Int = ${sprintf(indiv_level_yint, fmt='%#.3f')}"))
  
  return(p)
}
